Data3888_Capstone

Author

520549896 (hlim4884)

Published

June 1, 2025

Image

Spatial Relationship

Code
options(
  tidyverse.quiet      = TRUE,
  readr.show_col_types = FALSE
)

suppressPackageStartupMessages({
  library(EBImage)
  library(tidyverse)
  library(Rtsne)
  library(readxl)
})

meta <- read_excel("41467_2023_43458_MOESM4_ESM.xlsx")
boundaries <- read_csv("cell_boundaries.csv")
centroids <- read_csv("cbr.csv")
threepts <- read_csv("threepts.csv")

#head(meta)
#head(boundaries)
#head(centroids)
#head(threepts)

Umap of FFPE Cells

Code
library(ggplot2)
library(readxl)
library(dplyr)

meta <- read_excel("41467_2023_43458_MOESM4_ESM.xlsx")

# Plot using UMAP coordinates and annotated cell types
ggplot(meta, aes(x = `UMAP-X`, y = `UMAP-Y`, color = Annotation)) +
  geom_point(size = 0.5, alpha = 0.7) +
  theme_minimal() +
  labs(title = "UMAP of FFPE Cells by Annotation",
       x = "UMAP dim. 1", y = "UMAP dim. 2") +
  theme(legend.position = "right")

UMAP (Uniform Manifold Approximation and Projection) reduces high-dimensional data into 2D, placing similar data points closer together. Compared to t-SNE, UMAP is faster, better preserves the overall global structure, and handles large datasets well. It is used because we cannot visualize high-dimensional data directly, and 2D projection helps to identify patterns and clusters.

FFPE (Formalin-Fixed Paraffin-Embedded) is a method to preserve tissue samples by fixing them with formalin and embedding them in paraffin. It is widely used in pathology, especially cancer research, because it preserves cellular structures over long periods.

The meta file contains UMAP coordinates (UMAP-X, UMAP-Y) and the annotated cell type for each cell. By plotting these on a UMAP plane, we can visualize how similar cells cluster and how different types are spatially separated.

Cell Type proportions

Code
meta %>%
  count(Annotation) %>%
  ggplot(aes(x = reorder(Annotation, n), y = n, fill = Annotation)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Cell Type Proportions",
       x = "Cell Type", y = "Count") +
  theme(legend.position = "none")

Code
library(tidyverse)
library(readr)

target_cell_types <- c(
  "Invasive_Tumor", "Macrophages_1", "Macrophages_2", 
  "CD4+_T_Cells", "CD8+_T_Cells", "B_Cells", 
  "DCIS_1", "DCIS_2", "Stromal", 
  "Myoepi_ACTA2+", "Myoepi_KRT15+"
)

base_path <- "/Users/hyungjin/Desktop/3_S1C/data3888/100"
cell_types <- list.files(base_path)

img_df <- map_df(cell_types, function(ct) {
  files <- list.files(file.path(base_path, ct), full.names = TRUE)
  tibble(
    path = files,
    cell_type = ct,
    cell_id = as.integer(str_match(basename(files), "cell_(\\d+)_")[,2])
  )
})

cbr <- read_csv(file.path(base_path, "cbr.csv"))
img_coords <- left_join(
  img_df, cbr, 
  by = c("cell_id" = "index"),
  relationship = "many-to-many"
)

img_coords_filtered <- img_coords %>%
  filter(cell_type %in% target_cell_types)

annotation_colors <- c(
  "Invasive_Tumor"  = "red",
  "Macrophages_1"   = "#FDB863",
  "Macrophages_2"   = "#E66101",
  "CD4+_T_Cells"    = "grey",
  "CD8+_T_Cells"    = "black",
  "B_Cells"         = "blue",
  "DCIS_1"          = "lightyellow",
  "DCIS_2"          = "yellow",
  "Myoepi_ACTA2+"   = "green",
  "Myoepi_KRT15+"   = "lightgreen",
  "Stromal" = "purple"
)

ggplot(img_coords_filtered, aes(x = `axis-1`, y = `axis-0`, color = cell_type)) +
  geom_point(size = 0.3, alpha = 0.2) +
  scale_color_manual(values = annotation_colors) +
  coord_fixed() +
  theme_void() +
  labs(title = "Spatial Plot of Key Cell Types")

Delete Stromal

Code
library(tidyverse)
library(readr)

target_cell_types <- c(
  "Invasive_Tumor", "Macrophages_1", "Macrophages_2", 
  "CD4+_T_Cells", "CD8+_T_Cells", "B_Cells", 
  "DCIS_1", "DCIS_2", 
  "Myoepi_ACTA2+", "Myoepi_KRT15+"
)

base_path <- "/Users/hyungjin/Desktop/3_S1C/data3888/100"
cell_types <- list.files(base_path)

img_df <- map_df(cell_types, function(ct) {
  files <- list.files(file.path(base_path, ct), full.names = TRUE)
  tibble(
    path = files,
    cell_type = ct,
    cell_id = as.integer(str_match(basename(files), "cell_(\\d+)_")[,2])
  )
})

cbr <- read_csv(file.path(base_path, "cbr.csv"))
img_coords <- left_join(
  img_df, cbr, 
  by = c("cell_id" = "index"),
  relationship = "many-to-many"
)
img_coords_filtered <- img_coords %>%
  filter(cell_type %in% target_cell_types)

annotation_colors <- c(
  "Invasive_Tumor"  = "red",
  "Macrophages_1"   = "#FDB863",
  "Macrophages_2"   = "#E66101",
  "CD4+_T_Cells"    = "grey",
  "CD8+_T_Cells"    = "black",
  "B_Cells"         = "blue",
  "DCIS_1"          = "lightyellow",
  "DCIS_2"          = "yellow",
  "Myoepi_ACTA2+"   = "green",
  "Myoepi_KRT15+"   = "lightgreen"
)

ggplot(img_coords_filtered, aes(x = `axis-1`, y = `axis-0`, color = cell_type)) +
  geom_point(size = 0.3, alpha = 0.2) +
  scale_color_manual(values = annotation_colors) +
  coord_fixed() +
  theme_void() +
  labs(title = "Spatial Plot of Key Cell Types (Stromal X)")

Delete Stromal, Macophages1,2

Code
library(tidyverse)
library(readr)

target_cell_types <- c(
  "Invasive_Tumor", "CD4+_T_Cells", "CD8+_T_Cells", "B_Cells", 
  "DCIS_1", "DCIS_2", "Myoepi_ACTA2+", "Myoepi_KRT15+")

base_path <- "/Users/hyungjin/Desktop/3_S1C/data3888/100"
cell_types <- list.files(base_path)

img_df <- map_df(cell_types, function(ct) {
  files <- list.files(file.path(base_path, ct), full.names = TRUE)
  tibble(
    path = files,
    cell_type = ct,
    cell_id = as.integer(str_match(basename(files), "cell_(\\d+)_")[,2])
  )
})

cbr <- read_csv(file.path(base_path, "cbr.csv"))
img_coords <- left_join(
  img_df, cbr, 
  by = c("cell_id" = "index"),
  relationship = "many-to-many"
)

img_coords_filtered <- img_coords %>%
  filter(cell_type %in% target_cell_types)

annotation_colors <- c(
  "Invasive_Tumor"  = "red",
  "CD4+_T_Cells"    = "grey",
  "CD8+_T_Cells"    = "black",
  "B_Cells"         = "blue",
  "DCIS_1"          = "lightyellow",
  "DCIS_2"          = "yellow",
  "Myoepi_ACTA2+"   = "green",
  "Myoepi_KRT15+"   = "lightgreen"
)

ggplot(img_coords_filtered, aes(x = `axis-1`, y = `axis-0`, color = cell_type)) +
  geom_point(size = 0.3, alpha = 0.2) +
  scale_color_manual(values = annotation_colors) +
  coord_fixed() +
  theme_void() +
  labs(title = "Spatial Plot of Key Cell Types (Stromal, Macrophages X)")

Invasive & DCIS_1,2

Code
library(tidyverse)
library(readr)
target_cell_types <- c("Invasive_Tumor", "DCIS_1", "DCIS_2")
base_path <- "/Users/hyungjin/Desktop/3_S1C/data3888/100"
cell_types  <- list.files(base_path)
img_df <- map_df(cell_types, function(ct) {
  files <- list.files(file.path(base_path, ct), full.names = TRUE)
  tibble(
    path      = files,
    cell_type = ct,
    cell_id   = as.integer(str_match(basename(files), "cell_(\\d+)_")[, 2])
  )
})

cbr <- read_csv(file.path(base_path, "cbr.csv"))
img_coords <- left_join(
  img_df, cbr, 
  by = c("cell_id" = "index"),
  relationship = "many-to-many"
)

img_coords_filtered <- img_coords %>%
  filter(cell_type %in% target_cell_types)
annotation_colors <- c(
  "Invasive_Tumor" = "red",
  "DCIS_1"  = "yellow",
  "DCIS_2"  = "green"
)

ggplot(img_coords_filtered, aes(x = `axis-1`, y = `axis-0`, color = cell_type)) +
  geom_point(size = 0.3, alpha = 0.2) +
  scale_color_manual(values = annotation_colors) +
  coord_fixed() +
  theme_void() +
  labs(title = "Invasvie Tumor, DCIS1, DCIS2")

We characterized cellular heterogeneity at the boundary between DCIS (ductal carcinoma in situ) and invasive carcinoma. In H&E-stained breast carcinoma samples, DCIS_1 was enriched with myoepithelial markers (ACTA2, KRT15), whereas DCIS_2 showed reduced myoepithelial markers and increased invasive markers. In the invasive regions, myoepithelial cells were completely lost. These findings suggest that DCIS_2 represents a region with a high potential for invasive transition.